##########################################################
# Project:    Talking to the Populist Radical Right
# Task:       The script counts the mentions of parties in 
#             speeches in the Danish parliament and
#             annotates the data set
# Author:     Jan Schwalbach (21/07/2022)
##########################################################

# Loading packages and data

library(quanteda)
library(ggrepel)
library(grid)
library(zoo)
library(pscl)
library(aod)
library(ggplot2)
library(stringr)

load(file="Corpus_Denmark.Rdata")

# Deleting parties that are not analysed and the speaker

corpusDK<-corpusDK[corpusDK$party!="-",]
corpusDK<-corpusDK[corpusDK$party!="SP",]
corpusDK<-corpusDK[corpusDK$party!="LA",]
corpusDK<-corpusDK[corpusDK$party!="T",]
corpusDK<-corpusDK[corpusDK$party!="ALT",]
corpusDK<-corpusDK[corpusDK$party!="UFG",]
corpusDK<-corpusDK[corpusDK$party!="KD",]
corpusDK<-corpusDK[corpusDK$party!="TF",]
corpusDK<-corpusDK[corpusDK$party!="SIU",]
corpusDK<-corpusDK[corpusDK$party!="IA",]
corpusDK<-corpusDK[corpusDK$party!="LH",]
corpusDK<-corpusDK[corpusDK$party!="FRI",]
corpusDK<-corpusDK[corpusDK$party!="KRF",]
corpusDK<-corpusDK[corpusDK$party!="UP",]
corpusDK<-corpusDK[corpusDK$party!="CD",]
corpusDK<-corpusDK[corpusDK$party!="FP",]
corpusDK<-corpusDK[corpusDK$party!="NY",]
corpusDK<-corpusDK[corpusDK$party!="FF",]
corpusDK<-corpusDK[corpusDK$party!="REP",]
corpusDK<-corpusDK[corpusDK$party!="SIUMUT",]
corpusDK<-corpusDK[corpusDK$party!="NQ",]
corpusDK<-corpusDK[corpusDK$party!="JF",]
corpusDK <- corpusDK[corpusDK$type != "0",]

# Annotating speeches for topics

corpusDK$immigration <- 0
corpusDK$immigration[!corpusDK$migpres1!="TRUE"] <- 1
corpusDK$immigration[!corpusDK$migrationcount<=2] <- 1
corpusDK$immigration[corpusDK$migrationcount==0] <- 0

corpusDK$education <- 0
corpusDK$education[!corpusDK$educationpres1!="TRUE"] <- 1
corpusDK$education[!corpusDK$educationcount<=2] <- 1
corpusDK$education[corpusDK$educationcount==0] <- 0

# Subsetting for the first three periods after entry

corpusDK <- corpusDK[corpusDK$sessiondate >= "1998-03-11",]
corpusDK <- corpusDK[corpusDK$sessiondate <= "2007-11-13",]

# Loading the dictionaries and counting for each party

dictionary <- read.csv(file = "S.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$S <- str_trim(dictionary$S, side = "both")
S <- paste(" ", paste(dictionary$S[dictionary$S != ""], collapse = " | "), " ", sep = "")
corpusDK$Scount <- str_count(corpusDK$text, S) 
corpusDK$Spres <- ifelse(corpusDK$Scount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "EL.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$EL <- str_trim(dictionary$EL, side = "both")
EL <- paste(" ", paste(dictionary$EL[dictionary$EL != ""], collapse = " | "), " ", sep = "")
corpusDK$ELcount <- str_count(corpusDK$text, EL)
corpusDK$ELpres <- ifelse(corpusDK$ELcount > 0, TRUE, FALSE)

dictionary <- read.csv(file = "DF.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$DF <- str_trim(dictionary$DF, side = "both")
DF <- paste(" ", paste(dictionary$DF[dictionary$DF != ""], collapse = " | "), " ", sep = "")
corpusDK$DFcount <- str_count(corpusDK$text, DF) 
corpusDK$DFpres <- ifelse(corpusDK$DFcount > 0, TRUE, FALSE)

dictionary <- read.csv(file = "SF.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$SF <- str_trim(dictionary$SF, side = "both")
SF <- paste(" ", paste(dictionary$SF[dictionary$SF != ""], collapse = " | "), " ", sep = "")
corpusDK$SFcount <- str_count(corpusDK$text, SF) 
corpusDK$SFpres <- ifelse(corpusDK$SFcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "KF.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$KF <- str_trim(dictionary$KF, side = "both")
KF <- paste(" ", paste(dictionary$KF[dictionary$KF != ""], collapse = " | "), " ", sep = "")
corpusDK$KFcount <- str_count(corpusDK$text, KF)
corpusDK$KFpres <- ifelse(corpusDK$KFcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "V.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$V <- str_trim(dictionary$V, side = "both")
V <- paste(" ", paste(dictionary$V[dictionary$V != ""], collapse = " | "), " ", sep = "")
corpusDK$Vcount <- str_count(corpusDK$text, V) 
corpusDK$Vpres <- ifelse(corpusDK$Vcount > 0, TRUE, FALSE)

dictionary <- read.csv(file = "Radikale.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$Radikale <- str_trim(dictionary$Radikale, side = "both")
Radikale <- paste(" ", paste(dictionary$Radikale[dictionary$Radikale != ""], collapse = " | "), " ", sep = "")
corpusDK$Radikalecount <- str_count(corpusDK$text, Radikale) 
corpusDK$Vcount <- corpusDK$Vcount - corpusDK$Radikalecount
corpusDK$Vpres <- ifelse(corpusDK$Vcount > 0, TRUE, FALSE) 

dictionary <- read.csv(file = "RV.csv", fileEncoding = "WINDOWS-1252", stringsAsFactors = F, header = TRUE, sep = ";")
dictionary$RV <- str_trim(dictionary$RV, side = "both")
RV <- paste(" ", paste(dictionary$RV[dictionary$RV != ""], collapse = " | "), " ", sep = "")
corpusDK$RVcount <- str_count(corpusDK$text, RV) 
corpusDK$RVpres <- ifelse(corpusDK$RVcount > 0, TRUE, FALSE)

corpusDK$year <- gsub("-[0-9][0-9]-[0-9][0-9]","",corpusDK$sessiondate, fixed = FALSE)
corpusDK$year <- as.numeric(corpusDK$year)
corpusDK$partyyear <- paste(corpusDK$party,corpusDK$year)

DFcount <- aggregate(corpusDK$DFcount, by=list(Category=corpusDK$party), FUN=sum)
names(DFcount)[2] <- "DF"
ELcount <- aggregate(corpusDK$ELcount, by=list(Category=corpusDK$party), FUN=sum)
names(ELcount)[2] <- "EL"
ELcount[1] <- NULL
Scount <- aggregate(corpusDK$Scount, by=list(Category=corpusDK$party), FUN=sum)
names(Scount)[2] <- "S"
Scount[1] <- NULL
SFcount <- aggregate(corpusDK$SFcount, by=list(Category=corpusDK$party), FUN=sum)
names(SFcount)[2] <- "SF"
SFcount[1] <- NULL
KFcount <- aggregate(corpusDK$KFcount, by=list(Category=corpusDK$party), FUN=sum)
names(KFcount)[2] <- "KF"
KFcount[1] <- NULL
Vcount <- aggregate(corpusDK$Vcount, by=list(Category=corpusDK$party), FUN=sum)
names(Vcount)[2] <- "V"
Vcount[1] <- NULL
RVcount <- aggregate(corpusDK$RVcount, by=list(Category=corpusDK$party), FUN=sum)
names(RVcount)[2] <- "RV"
RVcount[1] <- NULL

# Combining all party counts

partycounts <- cbind(Scount,DFcount,SFcount,RVcount,ELcount,Vcount,KFcount)
partycounts$party <- partycounts$Category
partycounts1 <- partycounts

partycounts$DF[partycounts$party == "DF"] <- 0
partycounts$EL[partycounts$party == "EL"] <- 0
partycounts$KF[partycounts$party == "KF"] <- 0
partycounts$RV[partycounts$party == "RV"] <- 0
partycounts$S[partycounts$party == "S"] <- 0
partycounts$SF[partycounts$party == "SF"] <- 0
partycounts$V[partycounts$party == "V"] <- 0
partycounts$Category <- NULL

partycounts$DF_Share <- (partycounts$DF)/(partycounts$DF+partycounts$EL+partycounts$KF+partycounts$RV+partycounts$S+partycounts$SF+partycounts$V)

# Counting the shares for all/left/right parties

countsall <- partycounts[partycounts$party != "DF",]
countsall <- countsall[!is.na(countsall$DF_Share),]

countsleft <- countsall[countsall$party != "V",]
countsleft <- countsleft[countsleft$party != "KF",]

countsright <- countsall[countsall$party != "S",]
countsright <- countsright[countsright$party != "SF",]
countsright <- countsright[countsright$party != "RV",]
countsright <- countsright[countsright$party != "EL",]

mean(countsleft$DF_Share)
mean(countsright$DF_Share)
mean(countsall$DF_Share)

# Preparing and annotating the data set for the logit regression

corpusDK <- corpusDK[corpusDK$sessiondate >= "1998-03-11",]
corpusDK <- corpusDK[corpusDK$party != "DF",]

# Left parties

corpusDK$left <- 0
corpusDK$left[corpusDK$party == "S"] <- 1
corpusDK$left[corpusDK$party == "SF"] <- 1
corpusDK$left[corpusDK$party == "RV"] <- 1
corpusDK$left[corpusDK$party == "EL"] <- 1

# Mainstream parties

corpusDK$mainstream <- 0
corpusDK$mainstream[corpusDK$party == "S"] <- 1
corpusDK$mainstream[corpusDK$party == "V"] <- 1

corpusDK$RWdummy <- corpusDK$DFcount
corpusDK$RWdummy[corpusDK$RWdummy >= 1] <- 1

# Term

corpusDK$term <- 1
corpusDK$country <- "Denmark"
corpusDK$term[corpusDK$sessiondate >= "2001-11-20" & corpusDK$sessiondate < "2005-02-08"] <- 2
corpusDK$term[corpusDK$sessiondate >= "2005-02-08" & corpusDK$sessiondate < "2007-11-13"] <- 3
table(corpusDK$term)

# Government Type/party and right-wing populist size

corpusDK$minority <- 1
corpusDK$RRP_size <- 0
corpusDK$RRP_size[corpusDK$term == 1] <- 7.3
corpusDK$RRP_size[corpusDK$term == 2] <- 12.3
corpusDK$RRP_size[corpusDK$term == 3] <- 13.4

corpusDK$support <- 0
corpusDK$support[corpusDK$term == 2] <- 1
corpusDK$support[corpusDK$term == 3] <- 1

unique(corpusDK$party)

corpusDK$party[corpusDK$party == "S"] <- "SOC"
corpusDK$party[corpusDK$party == "V"] <- "VDK"

corpusDK$Government <- "Opposition"
corpusDK$Government[corpusDK$party == "SOC" & corpusDK$term == 1] <- "Government"
corpusDK$Government[corpusDK$party == "RV" & corpusDK$term == 1] <- "Government"
corpusDK$Government[corpusDK$party == "VDK" & corpusDK$term == 2] <- "Government"
corpusDK$Government[corpusDK$party == "KF" & corpusDK$term == 2] <- "Government"
corpusDK$Government[corpusDK$party == "VDK" & corpusDK$term == 3] <- "Government"
corpusDK$Government[corpusDK$party == "KF" & corpusDK$term == 3] <- "Government"

Denmark <- subset(corpusDK, select=c("RWdummy", "immigration", "left", "education", "mainstream", "type", "term", "minority", "RRP_size", "support", "Government", "country"))
save(Denmark, file = "Denmark_party_mentions.Rdata") # save corpus